
**************************************************************
*		Purpose: clean 1997 PSID cross-section  			 *
* 		Creates: psid1997_cleaned.dta 						 *
**************************************************************

clear all
set more off 
set rngstate $rdmseed

cd "$Mydirectory1/1_DataSources/PSID97_CrossSection"

*-------------------------------------------------------------*
*-------------------------------------------------------------*

	do ./rawdata/J320143.do //bring in raw data 
	do ./rawdata/J320143_formats.do //run PSID-provided clean-up

    drop V11101 ER10001 ER42001 ER53001 ER30463 ER60001 ///
	     ER66001 ER47301

	//relabel some variables
	label var V11915 "STATE GREW UP-HD 85"
	label var V12270 "STATE GREW UP-WF 85"
	label var ER11843 "EVER LIVE OTHER STATE 97"
	label var ER11951 "CKPT HEAD BORN IN US 97"
	label var ER12033 "CKPT WIFE BORN IN US 97"
	
	foreach var of varlist ER46444 ER46538 ER46977 ER46979 {
		local lab: variable label `var'	
		label var `var' "`lab' 09"	
	}
	
	label var ER57541 "STATE WIFE WAS BORN 13"
	label var ER57651 "STATE HEAD WAS BORN 13"	
	label var ER64663 "STATE SPOUSE WAS BORN 15"
	label var ER64802 "STATE HEAD WAS BORN 15"
	label var ER70736 "STATE SPOUSE WAS BORN 17"
	label var ER70874 "STATE HEAD WAS BORN 17"
	
	lookfor sequence 
	local seq_yr "`r(varlist)'"
	
	lookfor relation 
	local rel_yr "`r(varlist)'"
	
	foreach l of local seq_yr {
				
		if "`l'"=="ER33402" local num 97
		if "`l'"=="ER30464" local num 85
		if "`l'"=="ER34002" local num 09
		if "`l'"=="ER34102" local num 11
		if "`l'"=="ER34202" local num 13
		if "`l'"=="ER34302" local num 15
		if "`l'"=="ER34502" local num 17
		
		label variable `l' "SEQUENCE NUMBER `num'"		
	}
	
	foreach l of local rel_yr {
				
		if "`l'"=="ER33403" local num 97
		if "`l'"=="ER30465" local num 85
		if "`l'"=="ER34003" local num 09
		if "`l'"=="ER34103" local num 11
		if "`l'"=="ER34203" local num 13
		if "`l'"=="ER34303" local num 15
		if "`l'"=="ER34503" local num 17
		
		label variable `l' "RELATION TO HEAD `num'"
		
	}

	order ER30001 ER30002 ER33401 ER33402 ER30464 ER34002 /// 
	      ER34102 ER34202 ER34302 ER34502 ER33403 ER30465 ///
	      ER34003 ER34103 ER34203 ER34303 ER34503

*----------------------------------------------------------------------*
*----------------------------------------------------------------------*

******************************
* Special PSID Restrictions *
******************************

	// 1. Keep only heads and wives/"wives" (cohabitating females)
		  /*Notes: (1) Will exclude individuals whose relationship to head 
		               is first-year cohabitator of head, uncooperative legal 
		               wife, or uncooperative partner. 
				   (2) Most demographic variables are asked only of heads and 
				       wives/"wives" (cohabitating females).
				   (3) In later waves, the PSID changes its terminology---
		               "heads" are referred to as "reference persons" and 
		               wives and "wives" (cohabitating females) are referred to 
		               as spouses/partners, respectively.
		  */
	keep if inlist(ER33403,10,20,22)
	tab ER33403,m 
	tab ER32000 ER33403,m 

	// 2. Keep the correct head 
		  /*Note: In some instances, two people in the same family 
		          are labelled as the "head", but one person
		          is actually non-response in the current wave. As recommended
		          by the PSID, will use the sequence number to figure out who 
		          the correct head is. */
	gen taghead = ER33403==10 
	bysort ER30001 ER33401: egen totalheads = total(tag==1) 
	tab totalheads, m 
	
	drop if totalheads>1 & ER33402>50 //drop "fake" heads
	
	bysort ER30001 ER33401: egen totalheads2 = total(tag==1) 
	tab totalheads2, m //Confirmed: only 1 head per family now. 
	drop totalhead*
	

**************************************
* Cross-sectional, ind-level weight  *
**************************************
	clonevar weight_psid1997 = ER33438 

/*-----------------------------------------------------
		CLEANING
------------------------------------------------------*/

*****************************
* Demographics *
*****************************

*****************
* Sex *
*****************
	gen sex = ER32000 
	tab sex,m 

*****************
* Age *
*****************

	/*Note: Originally used the "age-head" 
	        and "age-wife" variables, but they seem 
	        to be of lower quality than the "age-individual" 
	        variable, so the latter will be used here. 
	*/

	clonevar age = ER33404 
	replace age =. if age==999

	gen agesq = age * age

*------------------------------------------------------------------------ 
* Save a dataset here for Appendix E exercise (drop table)
*------------------------------------------------------------------------ 
	
	preserve
		gen year = 1997
		save ./output/psid1997_4dropstable, replace
	restore

*----------------------------------*
*----------------------------------*

	keep if inrange(age,30,50)
	tab age,m 

******************
* Marital Status *
******************
	
	/* Note: Decided that "wives" (cohabitating partners) would
	         be given a "0" for all marital status dummies. */

	gen married =.
	replace married =1 if ER10016==1 & ER33403==10 //head (married)
	replace married =0 if inrange(ER10016,2,5) & ER33403==10 //head (unmarried)	
	replace married =1 if inrange(ER33407,1,3) & ER33403==20 //wives
	replace married =0 if inrange(ER33407,1,3) & ER33403==22 //"wives" (cohabitating females)
	tab married,m 
	
	gen never_married =.
	replace never_married =1 if ER10016==2 & ER33403==10 //heads
	replace never_married =0 if ((inlist(ER10016,1,3,4,5) & ER33403==10) | (ER33403==20 & married==1)) & never_married==. //ever-married heads + married wives
	replace never_married =0 if ER33403==22 & never_married==. //"wives" (cohabitating females)
	tab never_married,m 
	
	gen divorced =.
	replace divorced =1 if ER10016==4 & ER33403==10 //heads
	replace divorced =0 if ((inlist(ER10016,1,2,3,5) & ER33403==10) | (ER33403==20 & married==1)) & divorced==. //non-divorced heads + wives
	replace divorced =0 if ER33403==22 & divorced==. //"wives" (cohabitating females)
	tab divorced,m 
	
	gen widowed =.
	replace widowed =1 if ER10016==3 & ER33403==10 //heads
	replace widowed =0 if ((inlist(ER10016,1,2,4,5) & ER33403==10) | (ER33403==20 & married==1)) & widowed==. //non-widowed heads + wives
	replace widowed =0 if ER33403==22 & widowed==. //"wives" (cohabitating females)
	tab widowed,m 
	
	gen separated =.
	replace separated =1 if ER10016==5 & ER33403==10 //heads
	replace separated =0 if ((inlist(ER10016,1,2,3,4) & ER33403==10) | (ER33403==20 & married==1)) & separated==. //non-separated heads + wives
	replace separated =0 if ER33403==22 & separated==. // "wives" (cohabitating females)
	tab separated,m 
	

******************
* Race *
******************
	/*Note: Race is asked separately of the head 
	        and the wife/"wife", and each person is asked the 
	        question 4 times. Will fill in race for each 
	        person using the first mention, then using the 
	        second mention if race is stil missing, etc.*/
	
	gen race =.
	
	//heads
	ds, has(varlabel "*race of head*") insensitive
	di "`r(varlist)'"
	
	global racehead  "`r(varlist)'"
		
	foreach r of global racehead {
		di "`r'" 	

		replace race = 1 if inlist(`r',1,5) & ER33403==10 & race==.
		replace race = 2 if `r'==2 & ER33403==10 & race==.				
	}
	
	tab race,m 
	assert race==. if ER33403!=10 

	//wives/"wives" (cohabitating partner)
	ds, has(varlabel "*race of wife*") insensitive
	di "`r(varlist)'"
	
	global racewife  "`r(varlist)'"

	foreach r of global racewife {
		di "`r'" 
		
		replace race = 1 if inlist(`r',1,5) & ER33403!=10 & race==.
		replace race = 2 if `r'==2 & ER33403!=10 & race==.
				
	}
	
	tab race,m 
		
	gen black = (race==2) if race<.
	tab black, m
	tab race,m 
	label variable black "Dummy =1 if Respondent is Black"
	/*Note: The "0" code of the Black binary variable 
	        consists of white individuals and certain
	        categories of Latinx individuals.*/

******************************
* Foreign Born (respondent)  *
******************************

	foreach var of varlist ER11951 ER12033 {
		replace `var' =. if `var'==9 
	}
	
	gen foreignborn =.
	replace foreignborn = (ER11951==1) if ER11951<. & ER33403==10
	assert foreignborn ==. if ER33403!=10 
	replace foreignborn = (ER12033==1) if ER12033<. & ER33403!=10 & foreignborn==.
	tab foreignborn,m 
	
	**Restrict to native-born**
	keep if (foreignborn==0 | foreignborn==.)
	
******************************
* Foreign Born (father)  *
******************************

	gen fatherforeign =.
	
	//fathers of heads
	replace fatherforeign =1 if ER11813==0 & ER33403==10
	replace fatherforeign =0 if inrange(ER11813,1,56) & ER33403==10
	
	//fathers of wives or "wives" (cohabitating partners)
	replace fatherforeign =1 if ER11732==0 & ER33403!=10
	replace fatherforeign =0 if inrange(ER11732,1,56) & ER33403!=10
	
	assert (ER11813==99 & ER33403==10) | (ER11732==99 & ER33403!=10) if fatherforeign==. 
	tab fatherforeign, m
	
**********************************************
* Geographic variables *
**********************************************

label var ER11842 "STATE HD GREW UP 97"
label var ER12221C "REGION HD GREW UP 97"
label var ER12221E "CURRENT REGION 97"

*******************
* State of birth * 
*******************

/*Note: The 1997 variable "state where the respondent was born" 
        appears to be of poor quality. Since this piece of info
        should be time-invariant, the 2013, 2015, and 2017 
        "state born" variables will be used instead. */
	
	foreach var of varlist ER57541 ER57651 ER64663 ER64802 ER70736 ER70874 {
		
		if ("`var'"== "ER57541" | "`var'"== "ER64663" | "`var'"== "ER70736")  {
			local name1 "spouses"
			local name2 "SP"
		}
		
		
		if ("`var'"== "ER57651" |  "`var'"== "ER64802" |  "`var'"== "ER70874")  {
			local name1 "heads"
			local name2 "HD"		
		}
		
		if ("`var'"== "ER57541" | "`var'"== "ER57651") local num 13
		if ("`var'"== "ER64663" | "`var'"== "ER64802") local num 15
		if ("`var'"== "ER70736" | "`var'"== "ER70874") local num 17
		
		gen state_born_`name1'`num' = `var'
		label var state_born_`name1'`num' "STATE `name2' WAS BORN (CLEANED) `num'"
		
		tab state_born_`name1'`num',m 		
		replace state_born_`name1'`num' =. if (state_born_`name1'`num'==0 | state_born_`name1'`num'==99)  
		tab state_born_`name1'`num',m 
		
		
	}


	gen bpl =.	
	
	//heads
	
		//2013 info
		replace bpl = state_born_heads13 if bpl==. & ER33403==10 & ER34203==10 & inrange(ER34202,1,20) 
		replace bpl = state_born_spouses13 if bpl==. & ER33403==10 & inlist(ER34203,20,22) & inrange(ER34202,1,20) 
	
		//2015 info
		replace bpl = state_born_heads15 if bpl==. & ER33403==10 & ER34303==10 & inrange(ER34302,1,20) 
		replace bpl = state_born_spouses15 if bpl==. & ER33403==10 & inlist(ER34303,20,22) & inrange(ER34302,1,20) 
		
		//2017 info
		replace bpl = state_born_heads17 if bpl==. & ER33403==10 & ER34503==10 & inrange(ER34502,1,20)
		replace bpl = state_born_spouses17 if bpl==. & ER33403==10 & inlist(ER34503,20,22) & inrange(ER34502,1,20) 
	
		assert bpl ==. if ER33403!=10 
	
	//wives or "wives" (cohabitating partners)
	
		//2013 info
		replace bpl = state_born_spouses13 if bpl==. & ER33403!=10 & inlist(ER34203,20,22) & inrange(ER34202,1,20) 
		replace bpl = state_born_heads13 if bpl==. & ER33403!=10 & ER34203==10 & inrange(ER34202,1,20) 
	
		//2015 info
		replace bpl = state_born_spouses15 if bpl==. & ER33403!=10 & inlist(ER34303,20,22) & inrange(ER34302,1,20) 
		replace bpl = state_born_heads15 if bpl==. & ER33403!=10 & ER34303==10 & inrange(ER34302,1,20) 
		
		//2017 info
		replace bpl = state_born_spouses17 if bpl==. & ER33403!=10 & inlist(ER34503,20,22) & inrange(ER34502,1,20) 
		replace bpl = state_born_heads17 if bpl==. & ER33403!=10 & ER34503==10 & inrange(ER34502,1,20) 
		
	tab bpl, m

********************
* Region of birth * 
********************
	
	gen fips = bpl
	gen region4_born=.
		* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
	replace region4_born=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
		* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
	replace region4_born=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
		/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
				  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
	replace region4_born=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 | fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
		* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
	replace region4_born=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15
	tab region4_born, m
	label var region4_born "Region R born"
	
	drop fips
	
******************************************
* Whether R was born in the south * 
******************************************
	gen bornsouth = (region4_born==3) if region4_born~=.
	tab bornsouth, m

******************************************
* State and Region R grew up in * 
******************************************

*----------*
* state *
*----------*

	gen state_childhood = .
	
	replace ER11842 =. if inlist(ER11842,0,99) 

	//Heads 
	replace state_childhood = ER11842 if ER33403==10

	//Wives/"wives" 
	/*Note: State grew up in is only available for the head in 1997, 
        so will use info from 1985 and 2009 to fill in the state 
        where wife/"wife" (cohabitating partner) grew up. */

	foreach var of varlist V11915 V12270 ER46538 ER46444 {
		
		if ("`var'"== "V12270" | "`var'"== "ER46444")   {
			local name1 "spouses"
			local name2 "SP"
		}
		
		
		if ("`var'"== "V11915" |  "`var'"== "ER46538")   {
			local name1 "heads"
			local name2 "HD"		
		}
		
		if ("`var'"== "V11915" | "`var'"== "V12270") local num 85
		if ("`var'"== "ER46538" | "`var'"== "ER46444") local num 09
		
		gen state_grewup_`name1'`num' = `var'
		label var state_grewup_`name1'`num' "STATE `name2' WAS BORN (CLEANED) `num'"
		
		tab state_grewup_`name1'`num',m 		
		replace state_grewup_`name1'`num' =. if (state_grewup_`name1'`num'==0 | state_grewup_`name1'`num'==99)  
		tab state_grewup_`name1'`num',m 
			
	}

		//1985 info
		replace state_childhood = state_grewup_spouses85 if state_childhood==. & ER33403!=10 & inlist(ER30465,20,22) & inrange(ER30464,1,20) 
		replace state_childhood = state_grewup_heads85 if state_childhood==. & ER33403!=10 & ER30465==10 & inrange(ER30464,1,20) 
	
		//2009 info 
		replace state_childhood = state_grewup_spouses09 if state_childhood==. & ER33403!=10 & inlist(ER34003,20,22) & inrange(ER34002,1,20) 
		replace state_childhood = state_grewup_heads09 if state_childhood==. & ER33403!=10 & ER34003==10 & inrange(ER34002,1,20) 
		
	/*Extra step: Recover info on state grew up for HEADS who did not 
				  report info in 1997 but did report info in 1985 or 2009.
	*/
	
		//1985 info (heads)
		replace state_childhood = state_grewup_heads85 if state_childhood==. & ER33403==10 & ER30465==10 & inrange(ER30464,1,20) 
		replace state_childhood = state_grewup_spouses85 if state_childhood==. & ER33403==10 & inlist(ER30465,20,22) & inrange(ER30464,1,20) 
		
		//2009 info (heads)
		replace state_childhood = state_grewup_heads09 if state_childhood==. & ER33403==10 & ER34003==10 & inrange(ER34002,1,20) 
		replace state_childhood = state_grewup_spouses09 if state_childhood==. & ER33403==10 & inlist(ER34003,20,22) & inrange(ER34002,1,20) 
		
	tab state_childhood, m


*----------*
* region *
*----------*

	clonevar reg_grewup_heads97= ER12221C
	label var reg_grewup_heads97 "REGION HD GREW UP 97 (CLEANED)"
	
	replace reg_grewup_heads97=4 if reg_grewup_heads97==5
	replace reg_grewup_heads97=. if reg_grewup_heads97>4 //Foreign country or dk/na 
	tab reg_grewup_heads97,m 
	
	gen region4_childhood =.

	//Heads
	replace region4_childhood = reg_grewup_heads97 if ER33403==10 
	assert region4_childhood==. if ER33403!=10 

	//Wives/"wives" 
	/*Note: Region grew up in is only available for the head in 1997, 
        so will use info from 1985 and 2009 to fill in the region 
        where wife/"wife" (cohabitating partner) grew up. */
	
	//1985 info (wives/"wives")
	foreach var of varlist V12383 V12387 {
		
		if "`var'"== "V12383" {
			local name1 "heads"
			local name2 "HD"
		}
		
		if "`var'"== "V12387" {
			local name1 "wives"
			local name2 "WF"
		}
		
		gen reg_grewup_`name1'85 = `var'
		label var reg_grewup_`name1'85 "REGION 85 `name2' GREW UP (CLEANED)"
		
		replace reg_grewup_`name1'85 =4 if reg_grewup_`name1'85==5
		replace reg_grewup_`name1'85 =. if reg_grewup_`name1'85 >4 | reg_grewup_`name1'85==0
		tab reg_grewup_`name1'85 ,m 
		tab `var', m
		
	}
	
	replace region4_childhood = reg_grewup_heads85 if region4_childhood==. & ER33403!=10 & ER30465==10 & inrange(ER30464,1,20) //if the wife/"wife" in 1997 was a head in 1985 & this R was in the family (i.e., not dead, in an institution, etc) at the time of the 1985 interview.
	replace region4_childhood = reg_grewup_wives85 if region4_childhood==. & ER33403!=10 & inlist(ER30465,20,22) & inrange(ER30464,1,20) //if the wife/"wife" in 1997 was also the wife/"wife" in 1985 and this R was in the family (i.e., not dead, in an institution, etc) at the time of the 1985 interview.
	tab region4_childhood,m 
	
	//2009 info (wives/"wives")
		foreach var of varlist ER46977 ER46979 {
		
		if "`var'"== "ER46977" {
			local name1 "heads"
			local name2 "HD"
		}
		
		if "`var'"== "ER46979" {
			local name1 "wives"
			local name2 "WF"
		}
		
		gen reg_grewup_`name1'09 = `var'
		label var reg_grewup_`name1'09 "REGION 09 `name2' GREW UP (CLEANED)"
		
		replace reg_grewup_`name1'09 =4 if reg_grewup_`name1'09==5
		replace reg_grewup_`name1'09 =. if reg_grewup_`name1'09 >4 | reg_grewup_`name1'09==0
		tab reg_grewup_`name1'09,m 
		tab `var', m
		
	}

	replace region4_childhood = reg_grewup_heads09 if region4_childhood==. & ER33403!=10 & ER34003==10 & inrange(ER34002,1,20)
	replace region4_childhood = reg_grewup_wives09 if region4_childhood==. & ER33403!=10 & inlist(ER34003,20,22) & inrange(ER34002,1,20)
	tab region4_childhood ,m 
	
	/*Extra step: Recover info on region grew up for HEADS who did not 
				  report info in 1997 but did report info in 1985 or 2009.
	*/
	//1985 info (heads)
	replace region4_childhood = reg_grewup_heads85 if region4_childhood==. & ER33403==10 & ER30465==10 & inrange(ER30464,1,20)
	replace region4_childhood = reg_grewup_wives85 if region4_childhood==. & ER33403==10 & inlist(ER30465,20,22) & inrange(ER30464,1,20)
	tab region4_childhood, m
	
	//2009 info (heads)
	replace region4_childhood = reg_grewup_heads09 if region4_childhood==. & ER33403==10 & ER34003==10 & inrange(ER34002,1,20)
	replace region4_childhood = reg_grewup_wives09 if region4_childhood==. & ER33403==10 & inlist(ER34003,20,22) & inrange(ER34002,1,20)
	tab region4_childhood ,m 
	
********************************
* Region of current residence * 
********************************
	gen region4 = ER12221E
	replace region4 =4 if region4==5
	replace region4 =. if region4>4
	tab region4,m 
	tab ER12221E, m

	label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
	label values region4 region4_childhood region4_born region_l
	tab region4_childhood, m
	tab region4, m
	tab region4_born, m

************************************************
* Whether R has moved state/region since birth * 
************************************************

*----------*
* state *
*----------*

	replace ER10004 =. if inlist(ER10004,0,99)
	ren ER10004 current_state97
	
	gen moved_state = (state_childhood!=current_state97 | bpl!=current_state97)
	replace moved_state =. if (state_childhood==. & bpl==.) 
	replace moved_state =0 if (current_state97==state_childhood & bpl==. & current_state97!=.)
	replace moved_state =0 if (current_state97==bpl & state_childhood==. & current_state97!=.)
	tab moved_state, m 

*----------*
* region *
*----------*
	
	gen moved_region = (region4~=region4_born | region4~=region4_childhood)
	replace moved_region =. if (region4_born==. & region4_childhood==.)
	replace moved_region =0 if (region4==region4_born & region4_childhood==. & region4~=.)
	replace moved_region =0 if (region4==region4_childhood & region4_born==. & region4~=.)
	tab moved_region, m 

*-------------------------------------------------------------------------------------------------*
*-------------------------------------------------------------------------------------------------*

*******************
* Self-Employment *
*******************
	
	foreach var of varlist ER10086 ER10568  {
		tab `var', m
		replace `var' =. if `var'==9
		tab `var', m
	}

	/*Note: If the respondent reports working 
	        for *both* someone else and him/herself, 
	        the respondent will be coded as not 
	        self-employed.*/

	gen selfemployed =.
	replace selfemployed = (ER10086==3) if ER10086<. & ER33403==10 //head 
	replace selfemployed = (ER10568==3) if selfemployed==. & ER10568<. & ER33403!=10 //wife/"wife" (cohabitating partner)
	tab selfemployed,m 	
	
*******************************
* Education (Respondent) *
*******************************

	foreach var of varlist ER12222 ER12223  {
		tab `var',m 
		replace `var' =. if `var'==99 
		tab `var',m 
	}
	
	gen eduR =.
	
	//heads
	replace eduR =0 if ER12222==0 & ER33403==10 /*no schooling*/
	replace eduR =1 if inrange(ER12222,1,7) & ER33403==10 /*some grade school*/
	replace eduR =2 if ER12222==8 & ER33403==10 /*graduated grade school*/
	replace eduR =3 if inrange(ER12222,9,11) & ER33403==10 /*some high school*/
	replace eduR =4 if ER12222==12 & ER33403==10 /*completed high school*/
	replace eduR =5 if inrange(ER12222,13,15) & ER33403==10 /*some college*/
	replace eduR =6 if (ER12222>=16 & ER12222<.) & ER33403==10 /*completed college and beyond*/
	assert eduR ==. if ER33403!=10 

	//wives/"wives" (cohabitating partner)
	replace eduR =0 if eduR==. & ER12223==0 & ER33403!=10 /*no schooling*/
	replace eduR =1 if eduR==. & inrange(ER12223,1,7) & ER33403!=10 /*some grade school*/
	replace eduR =2 if eduR==. & ER12223==8 & ER33403!=10 /*graduated grade school*/
	replace eduR =3 if eduR==. & inrange(ER12223,9,11) & ER33403!=10 /*some high school*/
	replace eduR =4 if eduR==. & ER12223==12 & ER33403!=10 /*completed high school*/
	replace eduR =5 if eduR==. & inrange(ER12223,13,15) & ER33403!=10 /*some college*/
	replace eduR =6 if eduR==. & (ER12223>=16 & ER12223<.) & ER33403!=10 /*completed college and beyond*/
	tab eduR,m 

*Binned
	gen yrsschool_bin=. 
	replace yrsschool_bin = 0 if eduR==0
	replace yrsschool_bin = 6 if eduR==1  
	replace yrsschool_bin = 8 if eduR==2
	replace yrsschool_bin = 10 if eduR==3
	replace yrsschool_bin = 12 if eduR==4
	replace yrsschool_bin = 14 if eduR==5
	replace yrsschool_bin = 16 if eduR==6
	tab yrsschool_bin , m
	label var yrsschool_bin "Years of school, binned" 
	
	gen hs_ed = eduR>=4 if edu<. 
	tab hs_ed, m
	label var hs_ed "HS educated" 

	gen coll_ed = eduR>=6 if edu<.
	tab coll_ed, m 
	label var coll_ed "Coll educated" 

***********************
* Education (Parents) *
***********************

	gen edu_dad=.
	gen edu_mom=.

	foreach person in head wife {
		
		if "`person'"=="head"  {
			foreach name in dad mom  {	
				
				if "`name'"=="dad" {
					local var "ER11816"
					local cond "ER33403==10"
					local cond0 "ER11815==5"
				}
				if "`name'"=="mom" {
					local var "ER11824"
					local cond "ER33403==10"
					local cond0 "ER11823==5"
				}
				
				replace edu_`name' =0 if (`var'==0 & `cond0') & `cond' /*no schooling*/
				replace edu_`name' =1 if `var'==1 & `cond' /*some grade school*/
				replace edu_`name' =2 if `var'==2 & `cond' /*completed grade school*/ //Note: unable to split grades 6-7 from grade 8. 
				replace edu_`name' =3 if `var'==3 & `cond' /*some high school*/
				replace edu_`name' =4 if `var'==4 & `cond' /*completed high school*/
				replace edu_`name' =5 if (`var'==5 | `var'==6) & `cond' /*some college OR high school + nonacademic training*/
				replace edu_`name' =6 if (`var'==7 | `var'==8) & `cond' /* college+ */
				
				tab `var' if edu_`name'==. & `cond', m 
				
		}	
	}
	
		if "`person'"=="wife"  {
			foreach name in dad mom  {
		
				if "`name'"=="dad" {
					local var "ER11735"
					local cond "ER33403!=10"
					local cond0 "ER11734==5"
				}
				if "`name'"=="mom" {
					local var "ER11743"
					local cond "ER33403!=10"
					local cond0 "ER11742==5"
				}
				
				replace edu_`name' =0 if edu_`name'==. & (`var'==0 & `cond0') & `cond' /*no schooling*/
				replace edu_`name' =1 if edu_`name'==. & `var'==1 & `cond' /*some grade school*/
				replace edu_`name' =2 if edu_`name'==. & `var'==2 & `cond' /*completed grade school*/ //Note: unable to split grades 6-7 from grade 8. 
				replace edu_`name' =3 if edu_`name'==. & `var'==3 & `cond' /*some high school*/
				replace edu_`name' =4 if edu_`name'==. & `var'==4 & `cond'  /*completed high school*/
				replace edu_`name' =5 if edu_`name'==. & (`var'==5 | `var'==6) & `cond' /*some college OR high school + nonacademic training*/
				replace edu_`name' =6 if edu_`name'==. & (`var'==7 | `var'==8) & `cond' /* college+ */
				
				tab `var' if edu_`name'==. & `cond', m 
					
			}
		}
	
	}
	
	tab edu_dad, m 
	tab edu_mom, m 
		

*Binned 
	foreach name in mom dad { 
		gen edu_`name'_bin=0 if edu_`name'==0 
		replace edu_`name'_bin=6 if edu_`name'==1 
		replace edu_`name'_bin=8 if edu_`name'==2 
		replace edu_`name'_bin=10 if edu_`name'==3 
		replace edu_`name'_bin=12 if edu_`name'==4 
		replace edu_`name'_bin=14 if edu_`name'==5 
		replace edu_`name'_bin=16 if edu_`name'==6 
		tab edu_`name'_bin, m
		label var edu_`name'_bin "`name' Years of school from bins"
	
		gen `name'_hs_ed = edu_`name'>=4 if edu_`name'<.
		tab `name'_hs_ed, m

		gen `name'_coll_ed = edu_`name'>=6 if edu_`name'<.
		tab `name'_coll_ed, m 

		label var `name'_hs_ed "`name' HS educated" 
		label var `name'_coll_ed "`name' College educated"
}
	

*********************************
* # of persons in R's household *
*********************************

* # of children 0-17 living in respondent's household
	gen R_totnumkids_0to17_livinginhh = ER10012
	tab R_totnumkids_0to17_livinginhh, m
	label var R_totnumkids_0to17_livinginhh "Total # of kids (0-17) living in R's hh"		 	

* Total # of people living in respondent's household
	gen R_hhsize_plusR = ER10008
	tab R_hhsize_plusR, m
	
	gen R_hhsize_minusR = R_hhsize_plusR -1
	tab R_hhsize_minusR,m 
	
	label var R_hhsize_plusR "total # of persons in R's hh (including R)"
	label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"	

* Whether R is a sibling	
	sort ER30001 ER30002 	
	merge 1:1 ER30001 ER30002 using ./output/sib_map_wide.dta
	
	gen sib_psid1997 = (_merge==3)
	label var sib_psid1997 "R is a sibling & present in 1997 wave"

	drop if _merge==2 
	drop _merge
	drop SEX* ID68* PN* TYPE*

******************
* Unions *
******************

/*Note: If the respondent's job is covered by 
        a union contract, the respondent will
        be coded as in a union. */

	gen unionR =.

	//heads
	replace unionR =1 if ER10089==1 & ER33403==10
	replace unionR =0 if inlist(ER10089,5,0) & ER33403==10
	assert inlist(ER10089,8,9) if unionR==. & ER33403==10 
	assert unionR==. if ER33403!=10 
	
	//wives/"wives" (cohabitating partners)
	replace unionR =1 if unionR==. & ER10571==1 & ER33403!=10 
	replace unionR =0 if unionR==. & inlist(ER10571,5,0) & ER33403!=10 
	assert inlist(ER10571,8,9) if unionR==. & ER33403!=10 
	tab unionR,m 
	
*******************
* Veterans *
*******************
	gen veteran =.
	//not available

*----------------------------------------------------------------------------------*
*----------------------------------------------------------------------------------*

/*------------------------------------------------------------
	Crosswalking 1997 PSID occupations (retrospective)
	to 1950 ANES occupations for fathers and mothers
-------------------------------------------------------------*/

	ren ER12161 head_dad_occ
	ren ER12152 wife_dad_occ
	ren ER12163 head_mom_occ
	ren ER12154 wife_mom_occ
	
	foreach name in head_dad wife_dad head_mom wife_mom {
		
		if ("`name'"=="head_dad" | "`name'"=="head_mom") local shortname "head"
		if ("`name'"=="wife_dad" | "`name'"=="wife_mom") local shortname "wife"
		
		gen census1970=`name'_occ 
		label var census1970 "==`name'_occ (renamed to facilitate a merge)" 
		
		//Bring in crosswalk
		preserve
		use ../Crosswalks/Crosswalk_1970Census_toANES.dta, clear
		
		if ("`name'"=="head_dad" | "`name'"=="wife_dad") {
			ren fatheroccej fatheroccej_`shortname'	
			label var fatheroccej_`shortname' "father occ (`shortname')"
		}
		if ("`name'"=="head_mom" | "`name'"=="wife_mom") {
			ren fatheroccej motheroccej_`shortname'
			label var motheroccej_`shortname' "mother occ (`shortname')"
		}
		
		sort census1970
		tempfile crossw
		save `crossw'
		restore
			
		sort census1970
		merge m:1 census1970 using `crossw'
			
		assert census1970==. | census1970==999 if _merge==1
		tab census1970 if _merge==1, m
		drop if _merge==2
		drop _merge
		drop census1970

	}
	
	//Harmonized, coarsened father occupation and mother occupation
	gen fatheroccej =.
	replace fatheroccej = fatheroccej_head if ER33403==10
	replace fatheroccej = fatheroccej_wife if ER33403!=10 & fatheroccej==.
	assert fatheroccej==. if (fatheroccej_head==. & ER33403==10) | (fatheroccej_wife==. & ER33403!=10) 
	tab fatheroccej, m
	
	gen motheroccej =.
	replace motheroccej = motheroccej_head if ER33403==10
	replace motheroccej = motheroccej_wife if ER33403!=10 & motheroccej==.
	assert motheroccej==. if (motheroccej_head==. & ER33403==10) | (motheroccej_wife==. & ER33403!=10)
	tab motheroccej, m 
	
/*Note: Other than the "0" and "999" codes, there is no 
        skip logic to the parental retrospective occupation 
        questions. See the PSID data center for info on 
        interpretation of these codes. */

*****************************************************************************************************
* DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) *
*****************************************************************************************************
	
	gen father_notworking =.
	replace father_notworking =1 if fatheroccej==. & ((head_dad_occ==0 &  ER33403==10) | (wife_dad_occ==0 & ER10011!=0 & ER33403!=10)) 
	replace father_notworking =0 if fatheroccej!=. & father_notworking==.
	tab father_notworking, m 

	gen mother_notworking =.
	replace mother_notworking =1 if motheroccej==. & ((head_mom_occ==0 &  ER33403==10)  | (wife_mom_occ==0 & ER10011!=0 & ER33403!=10)) 
	replace mother_notworking =0 if motheroccej!=. & mother_notworking==.
	tab mother_notworking, m 

	/*Note: There are no occupation codes for "retired", "disabled", 
			or "student". All reasons for a parent not working are 
			covered by the "0" or "999" occupation codes.*/


***************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials *
***************************************************************************
/*Note: Info on whether R's mom or dad were self-employed when R 
        was growing up is not available in 1997 or in any wave. */

*------------------------------------------
* Head of hh dummies + father_imputed dummy	
*------------------------------------------

	foreach var of varlist ER11847 ER46447 ER46541 ER51808 ER51902 ER57547 ER57657 ER64669 ER64808 ER70742 ER70880 {
		
		if "`var'"=="ER11847" local num 97
		if ("`var'"=="ER46447" | "`var'"=="ER46541") local num 09
		if ("`var'"=="ER51808" | "`var'"=="ER51902") local num 11
		if ("`var'"=="ER57547" | "`var'"=="ER57657") local num 13
		if ("`var'"=="ER64669" | "`var'"=="ER64808") local num 15
		if ("`var'"=="ER70742" | "`var'"=="ER70880") local num 17
				
		tab `var', m
		replace `var' =. if inlist(`var',0,8,9) //dk/na/refused/inap: no wife/"wife" in family unit
		
		local lab: var label `var'
		
		if "`var'"=="ER11847" label var `var' "`lab'-HD (cleaned) `num'" 
		else label var `var' "`lab' (cleaned) `num'"

		tab `var', m
		
		
		
	}


	gen headofhh_father =.

	/* Note: Following the coding in other surveys: 
			--headofhh_father =1 if R lived with both parents when growing up 
			  or if R reports having lived with only a father. 
			--headofhh_mother =1 if R lived with a mother but not a father
			--headofhh_othermale =1 if R lived with a male relative and not 
			  with R's parents
			--headofhh_otherfemale =1 if R lived with a female relative and 
			  not with R's parents or a male relative. 
	*/
	
	//Heads
	replace headofhh_father = (ER11847==1) if ER11847<. & ER33403==10 

	/*Note: "Who R lived with when growing up" is only available 
	        for the head in 1997, so will use info from 2009, 2011, and 
	        2013 to fill in this info for wives/"wives" 
	        (cohabitating partner). */
	
	//wife/"wife" (cohabitating partner)

		//2009 info
		replace headofhh_father = (ER46447==1) if headofhh_father==. & ER46447<. & ER33403!=10 & inlist(ER34003,20,22) & inrange(ER34002,1,20) 
		replace headofhh_father = (ER46541==1) if headofhh_father==. & ER46541<. & ER33403!=10 & ER34003==10 & inrange(ER34002,1,20) 
		
		//2011 info
		replace headofhh_father = (ER51808==1) if headofhh_father==. & ER51808<. & ER33403!=10 & inlist(ER34103,20,22) & inrange(ER34102,1,20) 
		replace headofhh_father = (ER51902==1) if headofhh_father==. & ER51902<. & ER33403!=10 & ER34103==10 & inrange(ER34102,1,20) 
		
		//2013 info
		replace headofhh_father = (ER57547==1) if headofhh_father==. & ER57547<. & ER33403!=10 & inlist(ER34203,20,22) & inrange(ER34202,1,20) 
		replace headofhh_father = (ER57657==1) if headofhh_father==. & ER57657<. & ER33403!=10 & ER34203==10 & inrange(ER34202,1,20) 
		
	
	/*Extra step: Recover info on "who R lived with when growing up" 
	              for HEADS who did not report info in 1997 but did 
	              report info in 2009,2011, or 2013.
	*/
	
		//2009
		replace headofhh_father = (ER46541==1) if headofhh_father==. & ER46541<. & ER33403==10 & ER34003==10 & inrange(ER34002,1,20) 
		replace headofhh_father = (ER46447==1) if headofhh_father==. & ER46447<. & ER33403==10 & inlist(ER34003,20,22) & inrange(ER34002,1,20) 
	
		//2011
		replace headofhh_father = (ER51902==1) if headofhh_father==. & ER51902<. & ER33403==10 & ER34103==10 & inrange(ER34102,1,20) 
		replace headofhh_father = (ER51808==1) if headofhh_father==. & ER51808<. & ER33403==10 & inlist(ER34103,20,22) & inrange(ER34102,1,20) 
		
		//2013
		replace headofhh_father = (ER57657==1) if headofhh_father==. & ER57657<. & ER33403==10 & ER34203==10 & inrange(ER34202,1,20)
		replace headofhh_father = (ER57547==1) if headofhh_father==. & ER57547<. & ER33403==10 & inlist(ER34203,20,22) & inrange(ER34202,1,20) 
			
	replace headofhh_father =1 if headofhh_father==0 & fatheroccej!=. & motheroccej==. //Respondents living with a single dad 
	
	tab headofhh_father, m

	foreach name in mother othermale otherfemale {
		gen headofhh_`name' =.
		
		replace headofhh_`name' =0 if headofhh_father==1
		
		if "`name'"=="mother" replace headofhh_`name' =1 if fatheroccej==. & motheroccej!=. & headofhh_father==0
		else replace headofhh_`name' =0 if fatheroccej==. & motheroccej!=. & headofhh_father==0
		tab headofhh_`name',m 
	}

	label var headofhh_father "Head of hh when R was growing up was R's father"
	label var headofhh_mother "Head of hh when R was growing up was R's mother"
	label var headofhh_othermale "Head of hh when R was growing up was some other male (not R's father)"
	label var headofhh_otherfemale "Head of hh when R was growing up was some other female (not R's mother)"

	//Create alternate dummy for hh head being R's father 
	     /* Note: When R reports occupation of their father but 
	              does not specify who they lived with when
	              growing up, will assume that R lived with father.
	     */		
	gen headofhh_father_imputed = headofhh_father
	replace headofhh_father_imputed =1 if headofhh_father==. & fatheroccej!=.
	label var headofhh_father_imputed "Impute dad when parent occ != missing & no info about hh head at age 16"
	tab headofhh_father_imputed,m 


* Variable for father being either farm laborer or operator 
	gen fatherfarm=0 
	replace fatherfarm=. if fatheroccej==. 
	replace fatherfarm=1 if fatheroccej==71 | fatheroccej==81 
	tab fatherfarm, m 

/*--------------------------------------------------------------------
	Crosswalking 1997 PSID occupations to 1950 ANES
	occupations for adult respondents
---------------------------------------------------------------------*/

	ren ER12085 head_occ
	ren ER12116 wife_occ
	
	foreach name in head wife {
		
		gen census1970=`name'_occ 
		label var census1970 "==`name'_occ (renamed to facilitate a merge)" 

		//Bring in crosswalk
		preserve
		use ../Crosswalks/Crosswalk_1970Census_toANES.dta, clear

		rename fatheroccej occRej_`name'
		
		sort census1970
		tempfile crossw
		save `crossw'
		restore
			
		sort census1970
		merge m:1 census1970 using `crossw'
			
		assert census1970==. | census1970==999 if _merge==1
		tab census1970 if _merge==1
		drop if _merge==2
		drop _merge
		drop census1970

	}

/*Note: Other than the "0" and "999" codes, there is no 
        skip logic to the respondent occupation 
        questions. See the PSID data center for info on 
        interpretation of these codes. */

***************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials  * 
***************************************************************************

	foreach name in head wife {
		
		tab selfemployed if occRej_`name'==28, m
		replace occRej_`name'=21 if occRej_`name'==28 & selfemployed==1
		tab selfemployed if occRej_`name'==28, m
		
	}
	
	//Create harmonized, coarsened respondent occupation
	gen occRej =.
	replace occRej = occRej_head if ER33403==10
	replace occRej = occRej_wife if ER33403!=10 & occRej==.
	assert  occRej==. if (occRej_head==. & ER33403==10) | (occRej_wife==. & ER33403!=10) 
	tab occRej,m 
			
*----------------------------------------------------------------------*
*----------------------------------------------------------------------*

/*------------------------------------------------
	Family Income							
-------------------------------------------------*/

* Logged, continuous measure of family income 
	clonevar totfaminc_negs_as_zero = ER12079
	replace totfaminc_negs_as_zero =0 if totfaminc_negs_as_zero<0
	
	gen lnfaminc_nobin = ln(totfaminc_negs_as_zero) 
	
* Create binned measure
/* Note:
		The midpoint of each bin is assigned, with the exception of:
 			(1) the last bin, whose bottom value is multiplied by 1.25 
 	    	(as last bin is always "open-ended"--i.e. "25,000 or more")
 			(2) the bottom bin, whose top value is multiplied by 0.75
*/
	gen fam_inc =.
	replace fam_inc = 0.75*10000 if ER12079<10000 //<10k
	replace fam_inc = 12500 if ER12079>=10000 & ER12079<15000 //10k-15k	
	replace fam_inc = 20000 if ER12079>=15000 & ER12079<25000 //15k to less than 25k
	replace fam_inc = 30000 if ER12079>=25000 & ER12079<35000 //25k to less than 35k
	replace fam_inc = 40000 if ER12079>=35000 & ER12079<45000 //35k to less than 45k
	replace fam_inc = 50000 if ER12079>=45000 & ER12079<55000 //45k to less than 55k
	replace fam_inc = 60000 if ER12079>=55000 & ER12079<65000 //55k to less than 65k
	replace fam_inc = 70000 if ER12079>=65000 & ER12079<75000 //65k to less than 75k
	replace fam_inc = 82500 if ER12079>=75000 & ER12079<90000 //75k to less than 90k
	replace fam_inc = 100000 if ER12079>=90000 & ER12079<110000 //90k to less than 110k
	replace fam_inc = 1.25*110000 if ER12079>=110000 & ER12079<. //>110k
	tab fam_inc,m 
	
	gen bottomcoded_son = fam_inc==0.75*10000 if fam_inc<. 
	tab bottomcoded_son, m 

	gen topcoded_son = fam_inc==1.25*110000 if fam_inc<. 
	tab topcoded_son, m 

	label var bottomcoded_son "Respondent family income, bottom coded" 
	label var topcoded_son "Respondent family income, top coded" 
	
/* Convert family income variable to 1950 dollars
   using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 

   Note: In the 1997 questionnaire, the total family income 
         question asks about 1996 income. Will use CPI in 1996.
*/ 
	gen CPI1950 = 24.1 
	gen CPI1996 = 156.9	

	gen fam_inc_real =. 
	replace fam_inc_real = fam_inc * (CPI1950/CPI1996) 
	label var fam_inc_real "Binned Family income, in 1950 dollars" 
	
* Logged + binned
	gen lnfaminc=ln(fam_inc_real) 
	label var lnfaminc "Logged family income, binned and real" 

	
/*---------------------------------
	Birth cohorts							
----------------------------------*/

	gen year = 1997
	tab year, m
	label var year "Survey year" 
	
	gen dob = ER33406 
	replace dob =. if dob==9999 
	tab dob, m 
	label var dob "Year of birth" 

	gen decade=.
	replace decade=1940 if inrange(dob,1940,1949)
	replace decade=1950 if inrange(dob,1950,1959)
	replace decade=1960 if inrange(dob,1960,1969)
	tab decade,m
	label var decade "Decade of birth"

	//generate dummies for each decade
	tab decade, gen(decade_)

/*------------------------------------------------------------------------------------------
	FINAL RESTRICTION: RANDOMLY CHOOSE 1 PERSON (HEAD OR WIFE/"WIFE")
	                   IN 2-PERSON HOUSEHOLDS TO BE IN THE SAMPLE			
------------------------------------------------------------------------------------------*/ 

	bysort ER30001 ER33401: egen numpersons_subfamily = sum(1)
	tab numpersons_subfamily, m
	
	gen good4analysis = (race!=. & fatheroccej!=. & region4_childhood!=. & fam_inc_real!=.)
	tab good4analysis ,m 
	
	bysort ER30001 ER33401: egen total_good4analysis_subfamily = total(good4analysis==1)
	tab total_good4analysis_subfamily,m 
	
	sort ER30001 ER30002 //MUST ALWAYS SORT BY UNIQUE ID COMBO
	
	gen random = runiform() //Generate random # between 0 and 1 	
	sort ER30001 ER33401 random //Note: 1997 family id (ER33401) tells us who lived together/is part of the same subfamily unit.
	by ER30001 ER33401: gen analysis_sample = _n==1 //Grab first person listed in each hh. Order is determined by randomly assigned value.

	/*Note: in 2-person families, want to make sure that we grab the person
	        (if there's only one) that has all the necessary info available. */	
	replace analysis_sample = 1 if (good4analysis==1 & total_good4analysis_subfamily==1 & numpersons_subfamily==2) & analysis_sample==0
	replace analysis_sample = 0 if (good4analysis==0 & total_good4analysis_subfamily==1 & numpersons_subfamily==2) & analysis_sample==1
	tab analysis_sample,m 
	
	keep if analysis_sample==1
	
	bysort ER30001 ER33401: egen numpersons_subfamily2 = sum(1)
	tab numpersons_subfamily2, m //Confirmed: 1 person per 1997 family
	drop numpersons_subfamily2
 	
/*-----------------------------------------
		Save			
------------------------------------------*/ 
	
	gen faminc_missing = fam_inc_real==.
	label var faminc_missing "Family income missing"

	gen id_psid1997 = (ER30001*1000) + ER30002 //coded according to PSID instructions

	duplicates report id_psid1997 //no duplicates
	sort id_psid1997
	order id_psid1997 weight_psid1997
	compress
	save ./output/psid1997_cleaned, replace 
